Nutrilip_cleandata_Cow
Libraries
# load libraries
library(readxl)
library(visdat)
library(data.table)
library(tidyverse)
library(reshape2)
library(ggpubr)
#libraries for graphics
library(ggplot2)
library(plotly) #ggplot2 interactive plot
library(gridExtra)
#libraries for PCA
library(FactoMineR)
library(factoextra)
#libraries for imputation
library(impute)
library(VIM)
#libraries for PLSDA (can also perfom PCA methods)
library(ropls)
library(mixOmics)The code below shows the data set from the experiment AQ664-Nutrilip, the aim is to statistically analyze the data to be used later in a post-treatment analysis with diverse bioinformatics tools.
Data preparation
Import datasets from AQ664_Results_Lait.xlsx
Preparation of the data frame
#tableau dat_nutr as data frame
dat_nutr <- as.data.frame(dat_nutr)
dim(dat_nutr)#dimension of dat_nutr## [1] 32 157
#create an identifiant for each row : COWnumber_Diet
rownames(dat_nutr) <- paste(paste(dat_nutr$Species, dat_nutr$Id,sep=""), dat_nutr$Diet, sep="_")
dat_nutr_sum <- dat_nutr[,grep(pattern ="Sum", colnames(dat_nutr))]
#remove sum of each classe
dat_nutr <- dat_nutr[,-grep(pattern ="Sum", colnames(dat_nutr))]
#remove sum saturated/unsaturated
colnames(dat_nutr)[grep(pattern ="aturated", colnames(dat_nutr))]## [1] "Saturated Cer" "Unsaturated Cer" "Saturated SM" "Unsaturated SM"
dat_nutr <- dat_nutr[,-grep(pattern ="aturated", colnames(dat_nutr))]
dat_nutr <- dat_nutr[,-which(colnames(dat_nutr) %in% c("Id"))]
#remove qualitative variables
dat_nutrb <- dat_nutr[,-which(colnames(dat_nutr) %in% c("Id", "Group","Period","Species","Diet"))]
#transpose data for having in row the lipids and in columns the samples
dat_nutr2 <- data.frame(t(dat_nutrb))
#dimension of the data frame
dim(dat_nutr2)## [1] 141 32
#visualization of the data frame dat_nutr having in row the lipids and in columns the samples
dat_nutr2 <- data.frame(t(dat_nutrb))
vis_dat(dat_nutr2)Cow Data
pre-analysis in data
quali_var <- c("Id","Species","Group","Period","Diet")
class_lipids <- c("TG", "Cer", "PC", "PE", "SM", "PI", "PS")
cowdata <- dat_nutr[dat_nutr$Species=="Cow",]
vis_dat(cowdata)## $Species
##
## Cow
## 16
##
## $Group
##
## 1 2 3 4
## 4 4 4 4
##
## $Period
##
## P1 P2 P3 P4
## 4 4 4 4
##
## $Diet
##
## COS CTL HPO MAP
## 4 4 4 4
Missing value
#transform O in NA
cowdata[cowdata==0] <- NA
#visualize NA in the dataset -> check if missing data patterns are strange or not
vis_miss(cowdata)#loop to compute % of missing value per column
NAcow <- apply(cowdata, 2, function(x) 100*sum(is.na(x)/length(x)))
#see frequence
table(NAcow)## NAcow
## 0 6.25 100
## 142 2 1
#first row = % of missing value, second row = number of lipid
#see lipid with at least one missing value
NAcow[NAcow>0]## TG(18:0) TG(60:0) TG(62:1)
## 6.25 100.00 6.25
In the table we observe in black the missing values for each lipid, only 3 lipids are observed in which there is a missing value ((TG:60:0), (TG:18:0), TG(62:1)).
Exploratory analysis
Total abundance analysis
The abundance analysis helps us estimate the number of individuals in a population (lipids per animal).
#For each animal, sum all lipids -> total abundance for each animal
barplot(rowSums(cowdata2[,-which(colnames(cowdata2) %in% quali_var)],na.rm=T),
las=2,#ranspose the name on X-axis
main = "Total abundance")Graphic showing the total abundance with no transformed data
#For each animal, log2 transformation (log2(x=1)), then sum all lipids -> total abundance for each animal after log2 transformations
barplot(rowSums(log2(cowdata2[,-which(colnames(cowdata2) %in% quali_var)]+1),na.rm=T),
las=2,#ranspose the name on X-axis
main = "Total abundance, log2 transformation")Graphic showing the total abundance with transformed data log2
Boxplot analysis
A Boxplot shows the quantitative distribution of data in a way that facilitates comparison between variables, or across categorical levels of variables.
Boxplot with data not transformed
#preparation data set in order to use ggplot2 functions
cowdata2$Group <- as.factor(cowdata2$Group)
cowdata2$animal <- rownames(cowdata2)
meltcow <- reshape2::melt(cowdata2)## Using Species, Group, Period, Diet, animal as id variables
#Data with no transformation
pbox_beflog <- ggplot(meltcow, aes(x=animal, y = value)) +
geom_boxplot() +
ggtitle("Boxplot of samples (raw data not transformed)")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
pbox_beflog## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Box plot by diet with data transformed in log2
#Log2 transformation by diet
pbox_log <- ggplot(meltcow, aes(x=animal, y = log2(value +1), fill =Diet)) +
geom_boxplot()+
ggtitle("Boxplot on log2 transformed data, colored by diet")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
pbox_log## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Box plot by group with data transformed in log2
#Log2 transformation by group
ggplot(meltcow, aes(x=animal, y = log2(value +1), fill = Group)) +
geom_boxplot()+
ggtitle("Boxplot on log2 transformed data, colored by Group")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
Box plot by lipid class and diet with data transformed in log2
box_cow_lipsclass <- lapply(class_lipids, function(x){
df_red <- meltcow[grep(x,meltcow$variable),]
ggplot(df_red, aes(x=animal, y = log2(value +1), fill = Diet)) +
geom_boxplot()+
ggtitle(paste("Boxplot of samples with lipids class", x , sep=" "))+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
})
# do.call(grid.arrange, c(box_cow_lipsclass,ncol=3))
do.call(ggarrange, c(box_cow_lipsclass,common.legend = TRUE, legend="bottom"))## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Removed 2 rows containing non-finite values (`stat_boxplot()`).
Density analysis
The probability density function in statistics is what describes the relative probability according to which said random variable will take a certain value. It is intended to visualize a Gaussian distribution of data.
Density by sample (by animal)
#Data not transformed
pdens_beflog <- ggplot(meltcow, aes(x= value, colour = animal)) +
geom_density() +
ggtitle("Density of each sample (raw data)")
pdens_beflog## Warning: Removed 2 rows containing non-finite values (`stat_density()`).
#Data transformed in log2
pdens_log <- ggplot(meltcow, aes(x= log2(value+1), colour = animal)) +
geom_density()+
ggtitle("Density of each sample (log2 transformed)")
pdens_log## Warning: Removed 2 rows containing non-finite values (`stat_density()`).
## Warning: Removed 2 rows containing non-finite values (`stat_density()`).
Density by lipid class and animal with data transformed in log2
# Density of each cow, colored by animal
dens_cow_lipsclass <- lapply(class_lipids, function(x){
df_red <- meltcow[grep(x,meltcow$variable),]
ggplot(df_red, aes(x=log2(value +1), colour = animal)) +
geom_density()+
ggtitle(x)
})
# do.call(grid.arrange, c(dens_cow_lipsclass,ncol=3))
do.call(ggarrange, c(dens_cow_lipsclass,common.legend = TRUE, legend="right"))## Warning: Removed 2 rows containing non-finite values (`stat_density()`).
## Removed 2 rows containing non-finite values (`stat_density()`).
Density by lipid class and diet with data transformed in log2
# Density of each cow, colored by diet
dens_cow_2 <- lapply(class_lipids, function(x){
df_red <- meltcow[grep(x,meltcow$variable),]
ggplot(df_red, aes(x=log2(value +1), colour = Diet, fill=animal)) +
geom_density(alpha=0.01)+
ggtitle(x)
})
# do.call(grid.arrange, c(dens_cow_2,ncol=3))
do.call(ggarrange, c(dens_cow_2,common.legend = TRUE, legend="right"))## Warning: Removed 2 rows containing non-finite values (`stat_density()`).
## Removed 2 rows containing non-finite values (`stat_density()`).
Data transformation in log2
PCA analysis - global lipids - log2 transformed data
Principal components analysis (PCA) is used for dimention, reduction and exploration of complex high dimentional data set.
Be careful: the missing values was imputed temporarily by mean by default when you use the functions from the package FactoMineR.
#perform mode in log2 transformed data
pcacow <- PCA(cowlog2,
scale.unit =TRUE,
quali.sup = which(colnames(cowlog2) %in% c(quali_var,"animal"))) #add qualitative in supplemental variable## Warning in PCA(cowlog2, scale.unit = TRUE, quali.sup = which(colnames(cowlog2)
## %in% : Missing values are imputed by the mean of the variable: you should use
## the imputePCA function of the missMDA package
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
eigenvalue baroplot
Sample representation
# fviz_pca_ind(pcacow, habillage = "Diet")
#loop to display sample projection by coloring sample by different qualitative variable (group, period and diet)
lapply(quali_var[-c(1:2)], function(x){
fviz_pca_ind(pcacow, habillage = x,
addEllipses = T) +
ggtitle(paste("PCA cow", x, sep=": colored by "))})## [[1]]
##
## [[2]]
##
## [[3]]
Variable representation (correlation circle)
We still have 2 missing values to impute.
Imputation of missing values
Imputation it’s the process of replacing missing data with substituted values. Imputation made from data transformed into log 2, KNN method from VIM Package.
Imputation : KNN with k = 3
df <- cowlog2[,-which(colnames(cowlog2) %in% c("animal", quali_var))]
M_imp <- VIM::kNN(df, k=3,imp_var = FALSE)
cow_imp <- cowlog2
cow_imp[,-which(colnames(cow_imp) %in% c("animal",quali_var))] <- M_imp
cowlog2[,c("TG(18:0)","TG(62:1)")]## TG(18:0) TG(62:1)
## Cow101_HPO 0.037608374 NA
## Cow102_MAP 0.038559580 0.04848969
## Cow103_COS 0.017173687 0.02649172
## Cow104_CTL 0.023738151 0.07899748
## Cow105_HPO 0.016181796 0.10919347
## Cow106_MAP 0.016202041 0.03042843
## Cow107_COS 0.010153923 0.02172808
## Cow108_CTL 0.012251876 0.06794388
## Cow109_HPO 0.015191296 0.10205131
## Cow110_MAP 0.012217135 0.04474930
## Cow111_COS 0.005574338 0.01618820
## Cow112_CTL 0.028998136 0.11391545
## Cow113_HPO 0.008710565 0.12002070
## Cow114_MAP 0.009440814 0.04107281
## Cow115_COS NA 0.05458061
## Cow116_CTL 0.017198007 0.11781476
## TG(18:0) TG(62:1)
## Cow101_HPO 0.037608374 0.10205131
## Cow102_MAP 0.038559580 0.04848969
## Cow103_COS 0.017173687 0.02649172
## Cow104_CTL 0.023738151 0.07899748
## Cow105_HPO 0.016181796 0.10919347
## Cow106_MAP 0.016202041 0.03042843
## Cow107_COS 0.010153923 0.02172808
## Cow108_CTL 0.012251876 0.06794388
## Cow109_HPO 0.015191296 0.10205131
## Cow110_MAP 0.012217135 0.04474930
## Cow111_COS 0.005574338 0.01618820
## Cow112_CTL 0.028998136 0.11391545
## Cow113_HPO 0.008710565 0.12002070
## Cow114_MAP 0.009440814 0.04107281
## Cow115_COS 0.010153923 0.05458061
## Cow116_CTL 0.017198007 0.11781476
PCA analysis - imputed and log2 transformed data
PCA by Global lipids
#Model
pcacow <- PCA(cow_imp,
scale.unit =TRUE,
quali.sup =which(colnames(cow_imp) %in% c(quali_var,"animal")),
graph=FALSE)
#eigenvalue baroplot
fviz_eig(pcacow)#Sample representation
lapply(quali_var[-c(1:2)], function(x){
fviz_pca_ind(pcacow,
habillage = x,
addEllipses = T) +
ggtitle(paste("PCA cow", x, sep=": colored by "))})## [[1]]
##
## [[2]]
##
## [[3]]
PCA by class of lipids
#loop to perform a PCA by using lipids of one class of lipids
pca_byclass <- lapply(class_lipids, function(x){
newdata <- cow_imp[,c(which(colnames(cow_imp) %in% quali_var),
grep(x, colnames(cow_imp)))]
pca_res <- PCA(newdata,
scale.unit =TRUE,
quali.sup =which(colnames(newdata) %in% c(quali_var)),
graph=FALSE)
fig <- lapply(quali_var[-c(1:2)], function(z){
fviz_pca_ind(pca_res, habillage = z,
addEllipses = T) +
ggtitle(paste("PCA cow for class",paste(x, z, sep=": colored by "), sep =" "))})
pvar <- fviz_pca_var(pca_res)
fig[[4]] <- pvar
do.call("grid.arrange", c(fig, ncol=2))
})PLSDA on imputed and log2 transformed data
Partial Least Square-Discriminant Analysis combines PLS regression and linear discrimination analysis. It uses latent variables to capture the most important relationship between predictor and response variable, making it effective for distinguishing different groups or categories.
Different packages to make PLSDA
PLSDA with package ropls
## PLS-DA
## 16 samples x 140 variables and 1 response
## standard scaling of predictors and response(s)
## R2X(cum) R2Y(cum) Q2(cum) RMSEE pre ort pR2Y pQ2
## Total 0.766 0.858 0.703 0.189 3 0 0.05 0.05
#correlation circle for components 1 vs 2
plot(res_opls_cow,
typeVc = "correlation",
parCompVi = c(1,2))#to display sample & variable projection with components 1 vs 3
plot(res_opls_cow,
typeVc = "x-score",
parCompVi = c(1,3))#Get VIP (Varianle Importance Projection)
vipOpls_cow <- getVipVn(res_opls_cow)
vipOpls_cow <- vipOpls_cow[order(vipOpls_cow, decreasing = T)]
vipOpls_cow[vipOpls_cow>1]## PC(38:4) PC(38:3) PI(38:3) Cer(d18:1/22:0) Cer(d18:1/26:0)
## 1.564612 1.529985 1.443079 1.437573 1.424860
## Cer(d18:1/16:0) PI(34:1) Cer(d18:1/24:1) PI(38:4) Cer(d18:1/24:0)
## 1.364922 1.339593 1.324341 1.286682 1.277479
## Cer(d18:1/26:1) TG(52:2) PC(36:4) TG(60:4) TG(50:2)
## 1.267781 1.256479 1.244360 1.233228 1.218956
## TG(50:1) TG(54:4) TG(52:1) PI(38:1) PC(38:5)
## 1.217758 1.213093 1.190022 1.187193 1.174374
## PI(38:2) Cer(d18:1/18:0) SM(18:1/20:1) TG(62:1) Cer(d18:1/18:1)
## 1.173410 1.171935 1.161769 1.144289 1.139139
## TG(58:1) TG(48:0) PC(40:3) TG(54:5) PI(38:5)
## 1.135910 1.130446 1.123575 1.108583 1.104565
## PE(38:4) PS(36:0) PS(38:2) TG(48:1) Cer(d18:1/16:1)
## 1.102051 1.080459 1.076954 1.069099 1.069094
## PC(38:2) PE(36:4) PI(40:4) PE(38:6) PI(36:0)
## 1.066015 1.062203 1.061909 1.061378 1.059996
## TG(56:3) TG(56:4) TG(38:0) PI(36:1) TG(46:0)
## 1.055663 1.055387 1.053804 1.051322 1.050878
## PS(36:1) TG(40:1) TG(50:3) PS(38:4) TG(40:0)
## 1.047058 1.044855 1.044358 1.041001 1.039828
## PE(36:1) PE(40:3) TG(52:3) TG(36:0) TG(38:1)
## 1.038492 1.038294 1.036439 1.035915 1.024883
## PI(40:3) TG(42:0) TG(44:0) TG(42:1) Cer(d18:1/20:0)
## 1.023220 1.022198 1.022026 1.016081 1.010702
## TG(46:1) PI(40:5) TG(44:1)
## 1.007812 1.007507 1.006021
PLSDA with package MixOmics
#other way: mixOmics
resMO_cow <- mixOmics::plsda(X = cow_imp[,-c(1:4,145)],
Y = cow_imp$Diet,
ncomp=5)
resMO_cow$prop_expl_var## $X
## comp1 comp2 comp3 comp4 comp5
## 0.43229274 0.25860603 0.07554168 0.06980015 0.03810279
##
## $Y
## comp1 comp2 comp3 comp4 comp5
## 0.3333333 0.3333332 0.3332985 0.1198171 0.0733959
#sample and variable representation, components 1 vs 3
plotIndiv(resMO_cow,ellipse = T, comp=c(1,3))Sparse version (With selection)
##sparse: selection top10 for each component
resMO_sparse_cow <- mixOmics::splsda(X = cow_imp[,-c(1:4,145)],
Y = cow_imp$Diet,
keepX = c(10,10,10), ncomp=3)
#sample and variable representation, components 1 vs 2
plotIndiv(resMO_sparse_cow,ellipse = T)#sample and variable representation, components 2 vs 3
plotIndiv(resMO_sparse_cow,ellipse = T, comp=c(2,3))Normalisation by sum of class for each class
cowdata3 <- cow_imp %>%
mutate(Sum_TG = rowSums(dplyr::select(cow_imp, starts_with("TG")), na.rm=T),
Sum_Cer = rowSums(dplyr::select(cow_imp, starts_with("Cer")), na.rm=T),
Sum_PC = rowSums(dplyr::select(cow_imp, starts_with("PC")), na.rm=T),
Sum_PE = rowSums(dplyr::select(cow_imp[, -which(colnames(cow_imp) %in% c("animal", quali_var))], starts_with("PE")), na.rm=T),
Sum_SM = rowSums(dplyr::select(cow_imp, starts_with("SM")), na.rm=T),
Sum_PI = rowSums(dplyr::select(cow_imp, starts_with("PI")), na.rm=T),
Sum_PS = rowSums(dplyr::select(cow_imp, starts_with("PS")), na.rm=T))
dim(cowdata3)## [1] 16 152
#loop for each class: normalize lipids by the sum of its class
data_normbysumclass <- lapply(class_lipids, function(x){
cowdata3[, -which(colnames(cowdata3) %in% c("animal", quali_var))] %>%
dplyr::select(starts_with(x)) %>% #select all lipid for class x
mutate("Sum" = rowSums(dplyr::select(cowdata3[, -which(colnames(cowdata3) %in% c("animal", quali_var))],starts_with(x)), na.rm=T)) %>% #create a new column "Sum" = sum of each lipid of class x
mutate_at(vars(starts_with(x)), list(normsum = ~./Sum)) %>% #divise each lipids by the variable "Sum"
dplyr::select(ends_with("normsum")) #select only "normalized" lipids
})
#display results: uncomment the following row to see all elements of the list:
# data_normbysumclassYou obtain a list. Each item in the list corresponds to a lipid class, and the lipids have been normalized by the sum of the class for each individual.
Let’s check that the sum of the row is 1.
## [[1]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[2]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[3]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[4]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[5]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[6]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
##
## [[7]]
## Cow101_HPO Cow102_MAP Cow103_COS Cow104_CTL Cow105_HPO Cow106_MAP Cow107_COS
## 1 1 1 1 1 1 1
## Cow108_CTL Cow109_HPO Cow110_MAP Cow111_COS Cow112_CTL Cow113_HPO Cow114_MAP
## 1 1 1 1 1 1 1
## Cow115_COS Cow116_CTL
## 1 1
Boxplot
#TO do: rework for ggplot2 graphics
# lbox_normclass <- lapply(data_normbysumclass, function(x) {
# par(mfrow = c(1,2))
# boxplot(x)
# boxplot(t(x),las=2)
# })
#version ggplot2
lbox_normclass <- lapply(data_normbysumclass, function(x) {
x <- data.frame(Animal = rownames(x),
group = str_remove(str_extract(rownames(x), pattern = "\\_[A-Z]+"),pattern="_"),
x)
df <- reshape2::melt(x)
pbox_var <- ggplot(df, aes(x=variable, y = value)) +
geom_boxplot() +
ggtitle("Boxplot of variables")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
pbox_samp <- ggplot(df, aes(x=Animal, y = value,fill=group)) +
geom_boxplot() +
ggtitle("Boxplot of variables")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
grid.arrange(pbox_var, pbox_samp, ncol=2)
})## Using Animal, group as id variables
## Using Animal, group as id variables
## Using Animal, group as id variables
## Using Animal, group as id variables
## Using Animal, group as id variables
## Using Animal, group as id variables
## Using Animal, group as id variables
Now we’ll reduce the list to data.frame. For that, we use the
function do.call and explain that we want to paste the
columns together using cbind.
#reduce to dataframe
cow_normbysum <- do.call(cbind.data.frame, data_normbysumclass)
dim(cow_normbysum)## [1] 16 140
Remove normsum in the name of variables:
colnames(cow_normbysum) <- stringr::str_remove(string = colnames(cow_normbysum),
pattern="_normsum")Boxplot
df <- reshape2::melt(data.frame("Animal" = rownames(cow_normbysum),
"group" = str_remove(str_extract(rownames(cow_normbysum),
pattern = "\\_[A-Z]+"),pattern="_"),
cow_normbysum))## Using Animal, group as id variables
ggplot(df, aes(x=Animal, y = value,fill=group)) +
geom_boxplot() +
ggtitle("Boxplot of variables")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))Export cow datasets
#export dataset, log2 transformed, imputation by knn (k=3), each lipid normalized by the sum of his class
write.table(data.frame(ID = rownames(cow_normbysum),cow_normbysum),
file = "../data/cow_log2imp_normbysum.csv",
sep=";",
row.names=FALSE)
# Export imputed (knn, k=3) and log2 transformed data
write.table(data.frame(ID = rownames(cow_imp),
cow_imp[,-which(colnames(cow_imp) %in% c("Species","Group",
"Period","animal"))]),
file = "../data/cow_log2imp.csv",
sep=";",
row.names=FALSE)SessionInfo
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=French_France.utf8 LC_CTYPE=French_France.utf8
## [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C
## [5] LC_TIME=French_France.utf8
##
## time zone: Europe/Paris
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] mixOmics_6.26.0 lattice_0.22-5 MASS_7.3-60 ropls_1.34.0
## [5] VIM_6.2.2 colorspace_2.1-0 impute_1.76.0 factoextra_1.0.7
## [9] FactoMineR_2.9 gridExtra_2.3 plotly_4.10.3 ggpubr_0.6.0
## [13] reshape2_1.4.4 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
## [17] dplyr_1.1.3 purrr_1.0.2 readr_2.1.4 tidyr_1.3.0
## [21] tibble_3.2.1 ggplot2_3.4.4 tidyverse_2.0.0 data.table_1.14.8
## [25] visdat_0.6.0 readxl_1.4.3
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 vcd_1.4-11
## [3] rstudioapi_0.15.0 jsonlite_1.8.7
## [5] MultiAssayExperiment_1.28.0 magrittr_2.0.3
## [7] estimability_1.4.1 farver_2.1.1
## [9] rmarkdown_2.25 zlibbioc_1.48.0
## [11] vctrs_0.6.4 RCurl_1.98-1.13
## [13] MultiDataSet_1.30.0 rstatix_0.7.2
## [15] htmltools_0.5.7 S4Arrays_1.2.0
## [17] broom_1.0.5 cellranger_1.1.0
## [19] SparseArray_1.2.2 sass_0.4.7
## [21] bslib_0.6.1 htmlwidgets_1.6.3
## [23] plyr_1.8.9 emmeans_1.8.9
## [25] zoo_1.8-12 cachem_1.0.8
## [27] qqman_0.1.9 igraph_1.5.1
## [29] lifecycle_1.0.4 pkgconfig_2.0.3
## [31] Matrix_1.6-4 R6_2.5.1
## [33] fastmap_1.1.1 GenomeInfoDbData_1.2.11
## [35] MatrixGenerics_1.14.0 digest_0.6.33
## [37] rARPACK_0.11-0 S4Vectors_0.40.2
## [39] RSpectra_0.16-1 crosstalk_1.2.1
## [41] ellipse_0.5.0 GenomicRanges_1.54.1
## [43] labeling_0.4.3 fansi_1.0.5
## [45] timechange_0.2.0 httr_1.4.7
## [47] abind_1.4-5 compiler_4.3.1
## [49] proxy_0.4-27 withr_2.5.2
## [51] backports_1.4.1 BiocParallel_1.36.0
## [53] carData_3.0-5 highr_0.10
## [55] ggsignif_0.6.4 DelayedArray_0.28.0
## [57] corpcor_1.6.10 scatterplot3d_0.3-44
## [59] flashClust_1.01-2 tools_4.3.1
## [61] lmtest_0.9-40 ranger_0.16.0
## [63] nnet_7.3-19 glue_1.6.2
## [65] cluster_2.1.6 generics_0.1.3
## [67] gtable_0.3.4 tzdb_0.4.0
## [69] class_7.3-22 rmdformats_1.0.4
## [71] hms_1.1.3 sp_2.1-2
## [73] car_3.1-2 utf8_1.2.4
## [75] XVector_0.42.0 BiocGenerics_0.48.1
## [77] ggrepel_0.9.4 pillar_1.9.0
## [79] limma_3.58.1 robustbase_0.99-1
## [81] tidyselect_1.2.0 knitr_1.45
## [83] bookdown_0.37 IRanges_2.36.0
## [85] SummarizedExperiment_1.32.0 stats4_4.3.1
## [87] xfun_0.41 Biobase_2.62.0
## [89] statmod_1.5.0 matrixStats_1.1.0
## [91] DEoptimR_1.1-3 DT_0.30
## [93] stringi_1.8.1 lazyeval_0.2.2
## [95] yaml_2.3.7 boot_1.3-28.1
## [97] codetools_0.2-19 evaluate_0.23
## [99] laeken_0.5.2 multcompView_0.1-9
## [101] cli_3.6.1 xtable_1.8-4
## [103] munsell_0.5.0 jquerylib_0.1.4
## [105] Rcpp_1.0.11 GenomeInfoDb_1.38.1
## [107] parallel_4.3.1 ellipsis_0.3.2
## [109] leaps_3.1 calibrate_1.7.7
## [111] bitops_1.0-7 viridisLite_0.4.2
## [113] mvtnorm_1.2-4 scales_1.3.0
## [115] e1071_1.7-14 crayon_1.5.2
## [117] rlang_1.1.2 cowplot_1.1.1